#importing modules
import sys
import os
import csv
import subprocess
from subprocess import*
from Bio import SeqIO



def Combined_table_dS_values(cutoff, CMD_ARGV,vir_nam, CURR_DIR, PATH_IN):
    print CURR_DIR
    num_genes_v= (len(CMD_ARGV) - 2)/3
    print num_genes_v

    cutoff_list=[]

    for j in range(num_genes_v):
        cutoff_list.append(cutoff)

    new_cutoff_list=[]
    cutoff_index=0

    # reading the full_seq_file and genes_files
    full_seq_f= open(PATH_IN+CMD_ARGV[0])
    pairing_CT_f= str(PATH_IN+CMD_ARGV[1])

    gene_file_lst=[]
    dS_values_file_lst=[]
    i=2
    j=2+num_genes_v

    for k in range(1,num_genes_v+1):
        gene_file_lst.append(open(PATH_IN+CMD_ARGV[i]))
        dS_values_file_lst.append(open(PATH_IN+CMD_ARGV[j]))
        i+=1
        j+=1

    #reading the recorhds full_seq and genes_file
    records_full=[]
    records_genes=[]

    for full_seq in SeqIO.parse(full_seq_f,'fasta'):
        mut_object= full_seq.seq.tomutable()
        for i in range(0,len(mut_object)):
            if mut_object[i]=='-':        #filling gaps with 'N'
                mut_object[i]='N'    
        full_seq.seq=mut_object
        full_seq.id='full_seq'            #renaming sequences
        records_full.append(full_seq)

    gene_id=1
    rec_num=1
    rev_comp_index=3+2*num_genes_v


    #print "Gene senses (+/-) ", " ".join(sys.argv[rev_comp_index : rev_comp_index + num_genes_v+1])
    print "Gene senses (+/-) ", " ".join(CMD_ARGV[rev_comp_index-1 : rev_comp_index + num_genes_v])

    for gene_file in gene_file_lst:
        for gene_record in SeqIO.parse(gene_file,'fasta'):
            
            if CMD_ARGV[rev_comp_index-1]=="-":
                gene_record.seq=gene_record.seq.reverse_complement()
                mut_object = gene_record.seq.tomutable()
                for i in range(0,len(mut_object)):
                    if mut_object[i]=='-':        #filling gaps with 'N'
                        mut_object[i]='N'    
                gene_record.seq=mut_object
                gene_record.id='gene_%d_%d_'%(gene_id,rec_num)    #renaming sequences        
                records_genes.append(gene_record)
                rec_num+=1
        
            else :
                mut_object = gene_record.seq.tomutable()
                for i in range(0,len(mut_object)):
                    if mut_object[i]=='-':        #filling gaps with 'N'
                        mut_object[i]='N'    
                gene_record.seq=mut_object
                gene_record.id='gene_%d_%d_'%(gene_id,rec_num)    #renaming sequences        
                records_genes.append(gene_record)
                rec_num+=1

        gene_id +=1
        rec_num=1
        rev_comp_index+=1    
    # Creating file for alignment 

    file_records=[]
    for i in range(1,num_genes_v+1):
        name="rec_file_%d.fasta"%(i)
        output=open(CURR_DIR+"/"+name,'w')
        prefix="gene_%d_1_"%(i)
        for j in range(0,len(records_genes)):
            if records_genes[j].id.startswith(prefix):
                file_records.append(records_full[0])
                file_records.append(records_genes[j])
        SeqIO.write(file_records,output,'fasta')
        output.close()
        file_records=[]

   
    process_list=[]
    for i in range(1,num_genes_v+1):
        in_file_name="rec_file_%d.fasta"%(i)
        out_file_name="rec_align_%d.fasta"%(i)
        cmd=['C:/My Programs/Muscle/muscle3.8.31_i86win32.exe','-in', in_file_name,] #***************************************************????
        proc_name=Popen(cmd,stdout=PIPE,stderr=PIPE)
        process_list.append(proc_name)

    output_file=open(CURR_DIR+'rec_file_all_aligned.fas','w')
    for proc in process_list:
        (output, error)=proc.communicate()
        return_code = proc.wait()
        if return_code != 0:
            sys.stderr.write('Error occured running muscle')
            sys.stderr.write(error)
        else :
            output_file.write(output)
    output_file.close()
    print 'alignment completed!!!!!!!!!!!'

    #reading the records
        
    aligned_records=[]
    handler=open(CURR_DIR+'rec_file_all_aligned.fas','r')
    for seq_aligned in SeqIO.parse(handler,'fasta'):
        aligned_records.append(seq_aligned)
    handler.close()


    #creating lists for of all genes

    list_full_seqs=[]
    list_all_genes=[]
    i=0
    while i <len(aligned_records):
        list_full_seqs.append(aligned_records[i])
        list_all_genes.append(aligned_records[i+1])
        i+=2

    new_list_full_seq=[]
    new_list_all_genes=[]

    start_index=0
    j=0
    for rec in list_all_genes:
        seq_bases=list(list_full_seqs[j].seq)    #full seq
        rec_bases=list(rec.seq)            #gene
        i = len(rec_bases)-1
        while i!=0:
            var=rec_bases[i]
            if var=="-":
                i-=1
            else:
                end_index=i    
                break
        if j==num_genes_v-1:        
            new_list_full_seq= new_list_full_seq + seq_bases[start_index:]
            new_list_all_genes= new_list_all_genes + rec_bases[start_index:]
        else:
            new_list_full_seq= new_list_full_seq + seq_bases[start_index:end_index+1]
            new_list_all_genes= new_list_all_genes + rec_bases[start_index:end_index+1]
        j+=1
        start_index=end_index+1

    #print 'new list seq', len(new_list_full_seq)
    #print 'new list rec',len(new_list_all_genes)


    #genome position list
    nuc_position_list=[]
    pos = 1
    for i in range(0,len(new_list_full_seq)):
        if new_list_full_seq[i]=='-':
            nuc_position_list.append("-")
        else:        
            nuc_position_list.append(pos)
            pos +=1
        
    #calculation of pairing CT file.

    pairing_file=open(pairing_CT_f,"r")
    pairing_lines = pairing_file.readlines()
    pairing_list=[]

    for i in range(1, len(pairing_lines)):
        line_rec= pairing_lines[i].split('\t')
        if int(line_rec[4])==0:
            pairing_list.append('no')
        elif line_rec[4]!=0:
            pairing_list.append('yes')
    new_pairing_list=[]
    j=0
    for i in range(0,len(new_list_full_seq)):
        if new_list_full_seq[i]=='-':
            new_pairing_list.append("-")
        else:
            new_pairing_list.append(pairing_list[j])
            j+=1

    #Calculation of dS values
    list_dS_genes=[]
    new_list_dS_genes=[]
    rev_comp_index=3+2*num_genes_v
    start_index=3+num_genes_v
    gene_id_list=[]
    g_id=1


    fb_files = CMD_ARGV[2+num_genes_v:2+2*num_genes_v]       #shld be run as python syn1rates.py syn1Independant/env/env_syn1_Ind.res.M3.marginals (as an eg or a list of the latter files with the raw rates results) 

    for fb_file in fb_files:
        f = open(PATH_IN+fb_file, "r")  
        lines = f.readlines()
        
        for i in range(1,len(lines)):
  
            syncod=lines[i].split(",")[1]
            list_dS_genes.append(syncod)          
            gene_id_list.append("gene%d"%(g_id))
            new_cutoff_list.append(cutoff_list[cutoff_index])
            
        if CMD_ARGV[rev_comp_index-1]=="-":
            new_list_dS_genes= new_list_dS_genes + list_dS_genes[::-1]
        else:
            new_list_dS_genes=new_list_dS_genes + list_dS_genes
        list_dS_genes=[]
        g_id+=1
        cutoff_index+=1
        rev_comp_index+=1
    
    
    #mapping dS values to their correspoding gene position
    new1_list_dS_genes=[]                        
    new1_cutoff_list=[]
    new_gene_id_list=[]


    gene_base_index=0
    list_dS_index= 0
    dSvalue_appended_times=0


    while gene_base_index < len(new_list_all_genes):
        if new_list_all_genes[gene_base_index]=='-':
            new1_list_dS_genes.append('-')
            new1_cutoff_list.append('-')
            new_gene_id_list.append('-')
            gene_base_index+=1    
        elif new_list_all_genes[gene_base_index]=='N':
            new1_list_dS_genes.append('-')
            new1_cutoff_list.append('-')
            new_gene_id_list.append('-')
            gene_base_index+=1
                        
        elif new_list_all_genes[gene_base_index]!='-':
            new1_list_dS_genes.append(new_list_dS_genes[list_dS_index])
            new1_cutoff_list.append(new_cutoff_list[list_dS_index])
            new_gene_id_list.append(gene_id_list[list_dS_index])
            dSvalue_appended_times+=1
            gene_base_index+=1
            if dSvalue_appended_times==3:
                dSvalue_appended_times=0
                list_dS_index+=1


    #remove gene id for the position which have no pairing info


    for i in range(0,len(new_pairing_list)):
        if new_pairing_list[i]=='-':
            new_gene_id_list[i]='-'    


    constr_list=[]

    for i in range(0,len(new1_list_dS_genes)):
        if new1_list_dS_genes[i]=="-":
            constr_list.append("-")
        else:
            if new1_list_dS_genes[i]< new1_cutoff_list[i]:
                constr_list.append("yes")
            else:
                constr_list.append("no")

    combined_table=[ new_gene_id_list, nuc_position_list, new_list_full_seq, new_list_all_genes,new1_list_dS_genes,new1_cutoff_list, new_pairing_list,constr_list]

    f=open(CURR_DIR+'/%s_data_%s.txt'%(vir_nam,str(cutoff)),'w') #make a copy of the combined table

    f.write("ID\tPos\tBase\tGenes\tdS_Values\tcut_off\tPaired\tConstrained\n")
    for i in range(0,len(nuc_position_list)):
        f.write(str(combined_table[0][i])+"\t"+str(combined_table[1][i])+"\t"+str(combined_table[2][i])+"\t"+str(combined_table[3][i])+"\t"+str(combined_table[4][i])+"\t"+str(combined_table[5][i])+"\t"+str(combined_table[6][i])+"\t"+str(combined_table[7][i])+"\n")
    f.close()

	